import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputerImputer
df = pd.read_csv('E:/covid_19_data.csv')
df.head(50)
df.drop(['SNo','LastUpdate'],axis=1,inplace=True)
df.rename(columns={'ObservationDate':'Date','Province/State':'State','Country/Region':'Country'},inplace=True)
df.head(10)
df['Date']=pd.to_datetime(df['Date'])
df.head(10)
imputer = SimpleImputer(strategy='constant')
df2 = pd.DataFrame(imputer.fit_transform(df),columns=df.columns)
df3 = df2.groupby(['Country','Date'])[['Country','Date','Confirmed','Deaths','Recovered']].sum().reset_index()
df3.head(10)
Countries = df3['Country'].unique()
len(Countries)
for idx in range(0,len(Countries)):
c = df3[df3['Country']==Countries[idx]].reset_index()
plt.scatter(np.arange(0,len(c)),c['Confirmed'],color='blue',label='Confirmed')
plt.scatter(np.arange(0,len(c)),c['Recovered'],color='green',label='Recovered')
plt.scatter(np.arange(0,len(c)),c['Deaths'],color='red',label='Deaths')
plt.title(Countries[idx])
plt.xlabel('Days since the first case')
plt.ylabel('Number of cases')
plt.legend()
plt.show()
df4 = df3.groupby(['Date'])[['Date','Confirmed','Deaths','Recovered']].sum().reset_index()
c=df4
plt.scatter(np.arange(0,len(c)),c['Confirmed'],color='blue',label='Confirmed')
plt.scatter(np.arange(0,len(c)),c['Recovered'],color='green',label='Recovered')
plt.scatter(np.arange(0,len(c)),c['Deaths'],color='red',label='Deaths')
plt.title('World')
plt.xlabel('Days since the first case')
plt.ylabel('Number of Cases')
plt.legend()
plt.show()
len(Countries)